
*********************************************************************
************  PROBLEMS WITH PARTNER'S INFORMATION (1) ***************
*********************************************************************


/*****************************************************************************************
* MATCHING INDIVIDUALS WITHIN A HOUSEHOLD                                                *
* In this example we will match the information of respondents living with partners/spouses 
onto that of their partners/spouses.                                  *
*****************************************************************************************/


cd "G:\Mi unidad\Word\Investigación\Proyectos en curso\Ministerio Ccia Innovac (2020)\Datos Understanding Society\Datos net"

* Next, a global macro containing file path to data directory

global inpath_ukhls "G:\Mi unidad\Word\Datos\Understanding Society\US Waves 1_14 (2009_22) BHPS Waves 1_18 (1991_2009)\stata\stata13_se\ukhls"
global inpath_bhps "G:\Mi unidad\Word\Datos\Understanding Society\US Waves 1_14 (2009_22) BHPS Waves 1_18 (1991_2009)\stata\stata13_se\bhps"

global inpath_net "G:\Mi unidad\Word\Investigación\Proyectos en curso\Ministerio Ccia Innovac (2020)\Datos Understanding Society\Datos net"


//Open data file for all enumerated individuals and select the 
// variables for which you want to create a spouse/partner version
foreach w in a b c d e f g h i j k l m n {
	use using "$inpath_ukhls/`w'_indresp", clear
	isvar `w'_sex_dv `w'_age_dv `w'_agegr10_dv `w'_agegr13_dv `w'_mastat_dv ///
	`w'_ppno `w'_scopfamb `w'_scopfamf `w'_scopfamd `w'_jbstat `w'_hiqual_dv `w'_jbisco88 `w'_jbsoc00_cc `w'_jbsoc10_cc `w'_jobdeny `w'_stendreas `w'_nxtendreas `w'_jbiindb_dv `w'_jbnssec3_dv `w'_jbnssec5_dv `w'_jbnssec8_dv `w'_jbnssec_dv `w'_qfhigh_dv `w'_jbseg_dv `w'_jbrgsc_dv `w'_jbisco88_cc `w'_ppno `w'_hidp `w'_pid `w'_pidp `w'_ppid
	keep `r(varlist)'  
	
	// for appending the wave specific files into long format we need to 
	
	// (ii) create a wave variable
	gen wave = strpos("abcdefghijklmn","`w'")

	// (iii) drop the wave prefix from all variables that had one 
	rename `w'_* *

	// recode values from -1 to -9 to Stata system missing for all variables
	mvdecode _all, mv(-21, -20, -11/-1) 

	// save each wave specific file
	save `w'pjunk.dta, replace
}


// Open the file for wave a and then add the rest of the wave specific files
use apjunk, clear
foreach w in b c d e f g h i j k l m n {
	append using `w'pjunk.dta
	}


// get rid of unwanted temporary files
foreach w in a b c d e f g h i j k l m n {
	erase `w'pjunk.dta
}

save Partner_data_problem, replace
 
// Open data file for all enumerated individuals and select the 
// variables for which you want to create a spouse/partner version
		
// Restrict to individuals who have a spouse/partner in the household
// If an individual does not have a partner then a_ppno will be 0,
// if they do have a partner then a_ppno is the pno of their partner

keep if `w'ppno>0

// The following two cases allow me to check that the two main members of the household are there, and no one else more. 
// The second household (68068024) has three children that have disappeared the condition that there is a partner (ppno>0)
// must be fullfilled

browse hidp wave sex ppno age_dv if hidp==68054416
browse hidp wave sex ppno age_dv if hidp==68068024

// restrict just to male 
*keep if `w'sex_dv == 1

/// 'pno' is the individual's own number on the household grid. It's unique within a household but not across waves.
/// 'ppno' is the partner's person number

// rename the spouse/partner pno variable to respondent pno for matching to their partner.
rename `w'ppno `w'pno

// rename all individual characteristics to something that would indicate
// the characteristics refer to the spouse/partner. Here the prefix sp_
// rename all the variable except for HIDP , drop pno and change ppno to pno 
rename jbstat _sp_jbstat 
rename sex_dv _sp_sex_dv
rename age_dv _sp_age_dv
rename agegr10_dv _sp_agegr10_dv
rename agegr13_dv _sp_agegr13_dv
rename scopfamb _sp_scopfamb
rename scopfamd _sp_scopfamd
rename scopfamf _sp_scopfamf
rename jbisco88 _sp_jbisco88
rename jbsoc10_cc _sp_jbsoc10_cc
rename jbsoc00_cc _sp_jbsoc00_cc
rename jobdeny _sp_jobdeny
rename hiqual_dv _sp_hiqual_dv
rename stendreas _sp_stendreas
rename jbnssec8_dv _sp_jbnssec8_dv 
rename nxtendreas _sp_nxtendreas
rename jbiindb_dv _sp_jbiindb_dv
rename jbnssec3_dv _sp_jbnssec3_dv
rename jbnssec5_dv _sp_jbnssec5_dv
rename jbnssec_dv _sp_jbnssec_dv
rename qfhigh_dv _sp_qfhigh_dv
rename jbseg_dv _sp_jbseg_dv
rename jbrgsc_dv _sp_jbrgsc_dv
 
// save the file temporarily
save tmp_spinfo, replace

// merge it with the Complete2 dataset

use Prueba_partner.dta

merge 1:1 hidp pno wave using tmp_spinfo

// The following two orders allow to see that each member of the couple has information about his/her partner's 
// sex correctly associated to him/her. Next, a video in this regard (of how to match partner's info with UKHLS):
// https://open.essex.ac.uk/course/section.php?id=1028

// The second order in particular (last column) allows to see that _merge==1 corresponds to individuals whose partner
// was not in the household in the moment of the interview (ppno=0)

order hidp pno ppno sex_dv _sp_sex_dv
browse hidp wave pno ppno sex_dv _sp_sex_dv _merge

// The next order shows that _merge=2 is constituted by "shadows" of individuals who specified that they had a partner, but 
// the information of the partner is not there, perhaps because they were not interviewed. This is the reason why the second line 
// in what comes next does not have a PIDP

browse hidp wave pno ppno sex_dv _sp_sex_dv if _merge == 2

*the not matched from using (10,985 cases) are the cases where the interviewed specified that they had a partner, but the partner was not interviewed.
// For instance, I took one random hidpfrom _merge from using (_merge==2). You can see that the person is the same. That is why also if I do the report duplicate, there are some duplicates. If I drop these cases, there is no problem. 
browse pidp wave pno ppno sex_dv _sp_sex_dv age_dv _sp_age_dv _merge if hidp == 68061203

duplicates report pidp wave

drop if _merge == 2 
// indeed if I do it again, there are no duplicates anymore! 
duplicates report pidp wave

// drop the merge variable otherwise future merges will not work
drop _merge 

// save the data file
save Prueba_partner, replace 

// clean up unwanted files
erase tmp_spinfo.dta



* FUSIÓN DEL FICHERO ANTERIOR Y LA INFORMACIÓN A EXTRAER DEL FICHERO 'XVWAVEDAT'

	* La información de interés a extraer de ese fichero es la relativa al origen étnico e inmigrante del individuo.

merge m:1 pidp using "G:\Mi unidad\Word\Datos\Understanding Society\US Waves 1_14 (2009_22) BHPS Waves 1_18 (1991_2009)\stata\stata13_se\ukhls\xwavedat.dta"

drop _merge

save Prueba_partner, replace

